
******************************************************************************
*
*   STATA code
*   Clemens Kerr proposal for Exchange Visitor Skills List
*   August 8, 2024
*
******************************************************************************

version 18.0
frames reset
clear all
graph set window fontface "Linux Libertine O"  // graph set window fontface "Helvetica"
set more off

* Define directories

global working = "XXXXX"  // REPLACE WITH FILEPATH TO WORKING DIRECTORY

global rawdata = "$working"
global data = "$working"
global output = "$working"

frame rename default main

***

use "$data/clemens_kerr_replication_main.dta", clear

preserve 
	collapse (max) evsl2009 , by(ctrycode cip2010)
	gen one = 1
	collapse (sum) evsl2009 one, by(ctrycode)
	gen frac = evsl2009/one
	count if frac>0
	scalar all_pos = r(N)
	count if frac>0 & frac>0.8
	scalar restrict_80 = r(N)
	dis scalar(restrict_80)/scalar(all_pos)		// Percentage of countries that restrict more than 80 percent of disciplines, among those who restrict any disciplines 
restore


* TABLE 2

tab country if group_base == 1 & group == 1 & !small & !high_frac
tab country if group_base == 1 & group == 1 & small_and_underrepresented & !high_frac
tab country if group_base == 1 & group == 1 & !small & deparature_and_strong
tab country if group_base == 1 & group == 1 & small_and_underrepresented & deparature_and_strong

tab country if group_base == 2 & group == 2 & !small & !high_frac
tab country if group_base == 2 & group == 2 & small_and_underrepresented & !high_frac
tab country if group_base == 2 & group == 2 & !small & deparature_and_strong
tab country if group_base == 2 & group == 2 & small_and_underrepresented & deparature_and_strong

tab country if group_base == 3 & group == 3 & !small & !high_frac
tab country if group_base == 3 & group == 3 & small_and_underrepresented & !high_frac
tab country if group_base == 3 & group == 3 & !small & deparature_and_strong
tab country if group_base == 3 & group == 3 & small_and_underrepresented & deparature_and_strong

tab country if group == 1 & small_but_not_underrepresented & !deparature_but_not_strong
tab country if group == 1 & !small_but_not_underrepresented & deparature_but_not_strong
tab country if group == 1 & small_but_not_underrepresented & deparature_but_not_strong

tab country if group == 2 & small_but_not_underrepresented & !deparature_but_not_strong
tab country if group == 2 & !small_but_not_underrepresented & deparature_but_not_strong
tab country if group == 2 & small_but_not_underrepresented & deparature_but_not_strong

tab country if group == 3 & small_but_not_underrepresented & !deparature_but_not_strong
tab country if group == 3 & !small_but_not_underrepresented & deparature_but_not_strong
tab country if group == 3 & small_but_not_underrepresented & deparature_but_not_strong


* FIGURE 2
 
replace evsl2009 = . if inlist(cip2020_2,"28","32","33","34","35","36","37","53") // Only consider 2 digit codes that existed in 2009

collapse (mean) evsl2009 evsl_new (first) ctrycode, by(gdppc)
gen lgdppc=ln(gdppc)

graph twoway lpoly evsl2009 lgdppc, bw(.25) lcolor(red%50) ///
	|| scatter evsl2009 lgdppc, msize(*1.33) mlwidth(*.33) mlcolor(black%33) mfcolor(red%33) mlab(ctrycode) mlabsize(tiny) mlabangle(45) mlabcolor(gs10) mlabpos(1) ///
	aspect(.7) ///
	xlabel(`=ln(1000)' "1,000" `=ln(10000)' "10,000" `=ln(100000)' "100,000") ///
	xmtic(`=ln(2000)' `=ln(3000)' `=ln(4000)' `=ln(5000)' `=ln(6000)' `=ln(7000)' `=ln(8000)' `=ln(9000)' `=ln(20000)' `=ln(30000)' `=ln(40000)' `=ln(50000)' `=ln(60000)' `=ln(70000)' `=ln(80000)' `=ln(90000)' ) ///
	ylabel(0(.2)1, format(%02.1f)) ymtick(0(.1)1) legend(off) xtitle("GDP per capita (PPP, {it:log scale})") ///
	ytitle("Fraction of all fields on 2009 list")

graph export "$output/overview_old.png", replace width(1600)

graph twoway lpoly evsl_new lgdppc, bw(.25) lcolor(green%50) ///
	|| scatter evsl_new lgdppc, msize(*1.33) mlwidth(*.33) mlcolor(black%33) mfcolor(green%33) mlab(ctrycode) mlabsize(tiny) mlabangle(45) mlabcolor(gs10) mlabpos(1) ///
	aspect(.7) ///
	xlabel(`=ln(1000)' "1,000" `=ln(10000)' "10,000" `=ln(100000)' "100,000") ///
	xmtic(`=ln(2000)' `=ln(3000)' `=ln(4000)' `=ln(5000)' `=ln(6000)' `=ln(7000)' `=ln(8000)' `=ln(9000)' `=ln(20000)' `=ln(30000)' `=ln(40000)' `=ln(50000)' `=ln(60000)' `=ln(70000)' `=ln(80000)' `=ln(90000)' ) ///
	ylabel(0(.2)1, format(%02.1f)) ymtick(0(.1)1) legend(off) xtitle("GDP per capita (PPP, {it:log scale})") ///
	ytitle("Fraction of all fields on proposed list")

graph export "$output/overview_new.png", replace width(1600)

* FIGURE 4

use "$data/hdi_gdppc_comparison.dta", clear 

corr rank_gdppc rank_hdi

scatter rank_gdppc rank_hdi, msymbol(none) mlabel(ctrycode) mlabpos(0) aspect(1) mlabsize(*.5) xsc(reverse) ysc(reverse) ytitle("Rank GDP per capita at PPP") xtitle("Rank HDI")

graph export "$output/hdi.png", replace width(1600)
